Packages
pacman::p_load(
dplyr,
tidyverse,
readxl,
vegan,
reshape2,
RColorBrewer,
data.table,
grid,
ggh4x,
ggsci)
# setwd("D:/PhD/01_Data/01_Baseline/Model_1") # Windows
# setwd("/Volumes/Yiming_Wang/PhD/01_Data/01_Baseline/Model_1/Tissue") # Mac
setwd("/Volumes/Yiming_Wang/PhD/15_Manuscript/Submission/07_ISME/Review comments/Revision data/26 April 2024")#Yiming's imac pro
df_all <- read_excel("Baseline_S47_Tissue.xlsx") %>%
filter(SampleType != "Faeces") %>%
mutate(GenotypeSex_Type = paste(GenotypeSex, SampleType, sep="_")) %>%
mutate(Genotype=factor(Genotype,levels=c("WT","KO"))) %>%
mutate(Sex=factor(Sex,levels=c("Male"))) %>%
mutate(GenotypeSex = factor(GenotypeSex,levels = c("WT Male","KO Male"))) %>%
mutate(GenotypeSex_Type = factor(GenotypeSex_Type,
levels = c("WT Male_ST1_T","WT Male_ST2_T","WT Male_LT","WT Male_Faeces","KO Male_ST1_T","KO Male_ST2_T","KO Male_LT","KO Male_Faeces")))%>%
mutate(Sample=row_number()) %>%
relocate(Sample,GenotypeSex_Type)
df_all$Sample <- sprintf("%02d", as.numeric(df_all$Sample))
# For future facet_grid()
df_all<- df_all %>%
mutate(Sample = factor(paste0("Sample", " ",df_all$Sample))) %>%
mutate(FigureID =row_number()) %>%
relocate(FigureID) %>%
mutate(FigureID=ifelse(FigureID %in% c(1,2,3), 1,
ifelse(FigureID %in% c(4,5,6),2,
ifelse(FigureID %in% c(7,8,9),3,
ifelse(FigureID %in% c(10,11,12),4,
ifelse(FigureID %in% c(13,14,15),5,
ifelse(FigureID %in% c(16,17,18),1,
ifelse(FigureID %in% c(19,20,21),2,
ifelse(FigureID %in% c(22,23,24),3,
ifelse(FigureID %in% c(25,26,27),4,5))))))))))
# Subgroup by variables
#30436d WT Male
#9ea5c2 WT Female
#654e3a KO Male
#bcaba6 KO Female
#3C5488B2 WT
#7E6148B2 KO
#D98594FF Female
#94C5CCFF Male
Shape data
Define Core taxa
df_presence <- df_all %>%
gather(Taxa, Abundance, -c(1:9)) %>%
select(1,2,3,4,5,8,9,10,11) %>%
filter(Abundance != 0) %>% # save the taxa of which abundance is not equal to 0
group_by(Taxa) %>%
summarise(n=n_distinct(SequenceID)) %>% #get number of samples that have non-zero abundance bacteria as grouped by bacteria#
mutate(n.sample=nrow(df_all)) %>%
mutate(p=n/n.sample)
# Sample size is small, variance is large and three different tissue types has different composition, thus the presence criteria was set as 0.5 than 0.95
df_presence_core <- df_presence %>%
filter (p >= 0.6)
df_abundance <- df_all %>%
gather(Taxa, Abundance, -c(1:9)) %>% # transform to long table
select(1,2,3,4,5,8,9,10,11) %>%
group_by(Taxa) %>%
summarise(mean_abundance=mean(Abundance,na.rm=T),
median_abundance = median(Abundance,na.rm=T))
df_abundance_core <- df_abundance %>%
filter(mean_abundance>0.0001)
df_all_core <- left_join(df_presence_core, df_abundance_core, by="Taxa")
df_for_figure <- df_all %>%
gather(Taxa, Abundance, -c(1:9)) %>%
mutate(Taxa = ifelse(Taxa %in% df_all_core$Taxa, Taxa, "Non-core taxa")) %>%
group_by(FigureID, Sample, SequenceID, Genotype, Sex, GenotypeSex, Taxa, SampleType, GenotypeSex_Type) %>%
summarise(Abundance = sum(Abundance)) %>%
spread (Taxa, Abundance)
Melt the dataframe
#Create dataset with just headings and all taxa data
all_taxa1 <- df_for_figure %>% subset(select = c(2,9:ncol(df_for_figure)))
#Create dataset with just headings and all metadata
all_taxa1_names <- df_for_figure[,1:8]
#"Melt" data so it becomes a long list of each cell
all_taxa2<- melt(all_taxa1, id = c("Sample"))
all_taxa2$Mice <- factor(all_taxa2$Sample, levels=unique(all_taxa2$Sample)) #factor the ID column
#Add metadata to melt
all_taxa2_names <- left_join(all_taxa2, all_taxa1_names, by="Sample")
Figure
Sort taxa and Define color
#Generate bar plot with separate lengend
# Choose the most abundant taxa as the first bar and decedent the samples
order<-all_taxa2_names%>%
filter(variable == "Lactobacillus")%>%
arrange(desc(value))
# Factor your oder
order$Sample<-factor(order$Sample,levels=unique(order$Sample))
# Apply the factor level to your original data
all_taxa2_names$Sample<-factor(all_taxa2_names$Sample,levels = order$Sample)
# Define color
mypal = pal_simpsons("springfield", alpha = 0.7)(14)
mypal
## [1] "#FED439B2" "#709AE1B2" "#8A9197B2" "#D2AF81B2" "#FD7446B2" "#D5E4A2B2"
## [7] "#197EC0B2" "#F05C3BB2" "#46732EB2" "#71D0F5B2" "#370335B2" "#075149B2"
## [13] "#C80813B2" "#91331FB2"
library("scales")
show_col(mypal)

set.seed(860) #860
mypal<-sample(mypal)
show_col(mypal)

Bar plot
# Draw bar plot
figure <- ggplot(all_taxa2_names, aes(x = FigureID, fill = reorder(variable,-value), y = value))+ #reorder taxa based on their relative abundance
geom_bar(position="fill", stat="identity", linetype = 1, width = 0.7, colour = 'black',size = 0.2)+ #can replace colour = "black" with linetype = 0 for no outline
theme(axis.title = element_text(face="plain", size = 10),
axis.title.x=element_text(size =10),
axis.title.y=element_text(size =20, margin = margin(r = 5)),
axis.text.x=element_blank(),
axis.text.y=element_text(colour="black", face="plain", size=15),
axis.ticks.x=element_blank(),
plot.title = element_text(size=10, hjust=0.5, face="plain"),
legend.position = "right",
plot.background = element_rect(fill="transparent", colour = "transparent"))+
guides(fill=guide_legend(ncol=1))+
scale_y_continuous(expand = c(0,0),labels=scales::percent) +
labs(x = "", y = "Relative Abundance (%)", fill = "OTU")+
scale_fill_manual(values = c(mypal)) +
facet_grid(GenotypeSex~SampleType,scales = "fixed") + theme(strip.text = element_text(colour = 'white', size = rel(2)))
figure

Legend
## Three columns
all_bar_legend_set <- ggplot(all_taxa2_names, aes(x = Sample, fill = reorder(variable,-value), y = value))+
geom_bar(position="fill", stat="identity", linetype = 1, width = 0.8, color = "black", size = 0.3)+
theme(legend.title = element_text(size = 8, face = "bold"), legend.text = element_text(size = 6, face = "bold", colour = "black"), legend.key.size = unit(0.2, "cm"))+
scale_fill_manual(values = c(mypal))+
guides(fill=guide_legend(ncol=3))
all_bar_legend <- cowplot::get_legend(all_bar_legend_set)
grid.newpage()
grid.draw(all_bar_legend)

## Two columns
draw_key_polygon3 <- function(data, params, size) {
lwd <- min(data$size, min(size) / 4)
grid::rectGrob(
width = grid::unit(0.6, "npc"),
height = grid::unit(0.6, "npc"),
gp = grid::gpar(
col = data$colour,
fill = alpha(data$fill, data$alpha),
lty = data$linetype,
lwd = lwd * .pt,
linejoin = "mitre"
))
}
# register new key drawing function,
# the effect is global & persistent throughout the R session
GeomBar$draw_key = draw_key_polygon3
all_bar_legend_set <- ggplot(all_taxa2_names, aes(x = Mice, fill = reorder(variable,-value), y = value))+
geom_bar(position="fill", stat="identity", linetype = 1, width = 0.8, color = "black", size = 0.3)+
theme(legend.title = element_text(size = 8, face = "bold"),
legend.text = element_text(size = 12, face = "plain", colour = "black"),
legend.spacing.y = unit(1, 'cm'),
legend.key = element_rect(color = NA, fill = NA),
legend.key.size = unit(0.6, "cm"))+
scale_fill_manual(values = c(mypal))+
guides(fill=guide_legend(ncol=2,override.aes = list(colour = "black", size = 0.25)))
all_bar_legend_set

all_bar_legend <- cowplot::get_legend(all_bar_legend_set)
grid.newpage()
grid.draw(all_bar_legend)
